Importing libraries and required files

In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import plotly.offline as offline
import itertools
from sklearn.metrics import mean_squared_error
from math import sqrt
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import statsmodels.api as sm
import matplotlib
import warnings
warnings.filterwarnings("ignore")
from statsmodels.tsa.stattools import adfuller

offline.init_notebook_mode()
xls=pd.ExcelFile("PET_PRI_SPT_S1_M.xls")
df_crudeOil=pd.read_excel(xls,"Data 1")
df_cGasoline=pd.read_excel(xls,"Data 2")
df_rGasoline=pd.read_excel(xls,"Data 3")
df_heatOil=pd.read_excel(xls,"Data 4")
df_diesel=pd.read_excel(xls,"Data 5")
df_jet=pd.read_excel(xls,"Data 6")
df_prop=pd.read_excel(xls,"Data 7")

Cleaning Datasets

In [2]:
def cleanData(data):
    data.columns=data.iloc[1,:]
    data=data.iloc[2:,:]
    return data
In [3]:
#Crude Oil Dataset
df_crudeOil=cleanData(df_crudeOil)
df_crudeOil.head()
Out[3]:
1 Date Cushing, OK WTI Spot Price FOB (Dollars per Barrel) Europe Brent Spot Price FOB (Dollars per Barrel)
2 1986-01-15 00:00:00 22.93 NaN
3 1986-02-15 00:00:00 15.46 NaN
4 1986-03-15 00:00:00 12.61 NaN
5 1986-04-15 00:00:00 12.84 NaN
6 1986-05-15 00:00:00 15.38 NaN
In [4]:
#C Gasoline Dataset
df_cGasoline=cleanData(df_cGasoline)
df_cGasoline.head()
Out[4]:
1 Date New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon) U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)
2 1986-06-15 00:00:00 0.42 0.409
3 1986-07-15 00:00:00 0.34 0.334
4 1986-08-15 00:00:00 0.426 0.416
5 1986-09-15 00:00:00 0.42 0.398
6 1986-10-15 00:00:00 0.41 0.4
In [5]:
#R Gasoline Dataset
df_rGasoline=cleanData(df_rGasoline)
df_rGasoline.head()
Out[5]:
1 Date Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon)
2 2003-06-15 00:00:00 1.072
3 2003-07-15 00:00:00 0.965
4 2003-08-15 00:00:00 1.315
5 2003-09-15 00:00:00 0.949
6 2003-10-15 00:00:00 0.996
In [6]:
#Heat Oil Dataset
df_heatOil=cleanData(df_heatOil)
df_heatOil.head()
Out[6]:
1 Date New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon)
2 1986-06-15 00:00:00 0.38
3 1986-07-15 00:00:00 0.334
4 1986-08-15 00:00:00 0.408
5 1986-09-15 00:00:00 0.402
6 1986-10-15 00:00:00 0.394
In [7]:
#Diesel Dataset
df_diesel=cleanData(df_diesel)
df_diesel.head()
Out[7]:
1 Date New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon) U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon) Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon)
2 1996-04-15 00:00:00 NaN NaN 0.95
3 1996-05-15 00:00:00 NaN NaN 0.87
4 1996-06-15 00:00:00 NaN NaN 0.745
5 1996-07-15 00:00:00 NaN NaN 0.692
6 1996-08-15 00:00:00 NaN NaN 0.705
In [8]:
#Jet Fuel Dataset
df_jet=cleanData(df_jet)
df_jet.head()
Out[8]:
1 Date U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon)
2 1990-04-15 00:00:00 0.54
3 1990-05-15 00:00:00 0.515
4 1990-06-15 00:00:00 0.494
5 1990-07-15 00:00:00 0.535
6 1990-08-15 00:00:00 0.791
In [9]:
#Propane Dataset
df_prop=cleanData(df_prop)
df_prop.head()
Out[9]:
1 Date Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon)
2 1992-06-15 00:00:00 0.344
3 1992-07-15 00:00:00 0.342
4 1992-08-15 00:00:00 0.354
5 1992-09-15 00:00:00 0.375
6 1992-10-15 00:00:00 0.354

Creating column in each dataset givem to find price changes month over month

In [10]:
#Crude Oil
df_crudeOil['Cushing, OK WTI Monthly Change']=df_crudeOil['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)'].pct_change()*100
df_crudeOil['Europe Brent Monthly Change']=df_crudeOil['Europe Brent Spot Price FOB (Dollars per Barrel)'].pct_change()*100
df_crudeOil['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)']=df_crudeOil['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)'].astype('float')
df_crudeOil['Europe Brent Spot Price FOB (Dollars per Barrel)']=df_crudeOil['Europe Brent Spot Price FOB (Dollars per Barrel)'].astype('float')
df_crudeOil.Date = pd.to_datetime(df_crudeOil.Date)
df_crudeOil.set_index('Date', inplace=True)
df_crudeOil
Out[10]:
1 Cushing, OK WTI Spot Price FOB (Dollars per Barrel) Europe Brent Spot Price FOB (Dollars per Barrel) Cushing, OK WTI Monthly Change Europe Brent Monthly Change
Date
1986-01-15 22.93 NaN NaN NaN
1986-02-15 15.46 NaN -32.577410 NaN
1986-03-15 12.61 NaN -18.434670 NaN
1986-04-15 12.84 NaN 1.823949 NaN
1986-05-15 15.38 NaN 19.781931 NaN
1986-06-15 13.43 NaN -12.678804 NaN
1986-07-15 11.59 NaN -13.700670 NaN
1986-08-15 15.10 NaN 30.284728 NaN
1986-09-15 14.87 NaN -1.523179 NaN
1986-10-15 14.90 NaN 0.201748 NaN
1986-11-15 15.22 NaN 2.147651 NaN
1986-12-15 16.11 NaN 5.847569 NaN
1987-01-15 18.65 NaN 15.766605 NaN
1987-02-15 17.75 NaN -4.825737 NaN
1987-03-15 18.30 NaN 3.098592 NaN
1987-04-15 18.68 NaN 2.076503 NaN
1987-05-15 19.44 18.58 4.068522 NaN
1987-06-15 20.07 18.86 3.240741 1.506997
1987-07-15 21.34 19.86 6.327853 5.302227
1987-08-15 20.31 18.98 -4.826617 -4.431017
1987-09-15 19.53 18.31 -3.840473 -3.530032
1987-10-15 19.86 18.76 1.689708 2.457673
1987-11-15 18.85 17.78 -5.085599 -5.223881
1987-12-15 17.28 17.05 -8.328912 -4.105737
1988-01-15 17.13 16.75 -0.868056 -1.759531
1988-02-15 16.80 15.73 -1.926445 -6.089552
1988-03-15 16.20 14.73 -3.571429 -6.357279
1988-04-15 17.86 16.60 10.246914 12.695180
1988-05-15 17.42 16.31 -2.463606 -1.746988
1988-06-15 16.53 15.54 -5.109070 -4.721030
... ... ... ... ...
2016-08-15 44.72 45.84 0.156775 1.979978
2016-09-15 45.18 46.57 1.028623 1.592496
2016-10-15 49.78 49.52 10.181496 6.334550
2016-11-15 45.66 44.73 -8.276416 -9.672859
2016-12-15 51.97 53.31 13.819536 19.181757
2017-01-15 52.50 54.58 1.019819 2.382292
2017-02-15 53.47 54.87 1.847619 0.531330
2017-03-15 49.33 51.59 -7.742659 -5.977766
2017-04-15 51.06 52.31 3.506994 1.395619
2017-05-15 48.48 50.33 -5.052879 -3.785127
2017-06-15 45.18 46.37 -6.806931 -7.868071
2017-07-15 46.63 48.48 3.209385 4.550356
2017-08-15 48.04 51.70 3.023804 6.641914
2017-09-15 49.82 56.15 3.705246 8.607350
2017-10-15 51.58 57.51 3.532718 2.422084
2017-11-15 56.64 62.71 9.810004 9.041906
2017-12-15 57.88 64.37 2.189266 2.647106
2018-01-15 63.70 69.08 10.055287 7.317073
2018-02-15 62.23 65.32 -2.307692 -5.442965
2018-03-15 62.73 66.02 0.803471 1.071647
2018-04-15 66.25 72.11 5.611350 9.224477
2018-05-15 69.98 76.98 5.630189 6.753571
2018-06-15 67.87 74.41 -3.015147 -3.338529
2018-07-15 70.98 74.25 4.582290 -0.215025
2018-08-15 68.06 72.53 -4.113835 -2.316498
2018-09-15 70.23 78.89 3.188363 8.768785
2018-10-15 70.75 81.03 0.740424 2.712638
2018-11-15 56.96 64.75 -19.491166 -20.091324
2018-12-15 49.52 57.36 -13.061798 -11.413127
2019-01-15 51.38 59.41 3.756058 3.573919

397 rows × 4 columns

In [11]:
#Converntional Gasoline
df_cGasoline['New York Harbor Conventional Gasoline Monthly Change']=df_cGasoline['New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)'].pct_change()*100
df_cGasoline['U.S. Gulf Coast Conventional Gasoline Monthly Change']=df_cGasoline['U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)'].pct_change()*100
df_cGasoline['New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)']=df_cGasoline['New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)'].astype('float')
df_cGasoline['U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)']=df_cGasoline['U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)'].astype('float')
df_cGasoline.Date = pd.to_datetime(df_cGasoline.Date)
df_cGasoline.set_index('Date', inplace=True)
df_cGasoline
Out[11]:
1 New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon) U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon) New York Harbor Conventional Gasoline Monthly Change U.S. Gulf Coast Conventional Gasoline Monthly Change
Date
1986-06-15 0.420 0.409 NaN NaN
1986-07-15 0.340 0.334 -19.047619 -18.337408
1986-08-15 0.426 0.416 25.294118 24.550898
1986-09-15 0.420 0.398 -1.408451 -4.326923
1986-10-15 0.410 0.400 -2.380952 0.502513
1986-11-15 0.411 0.409 0.243902 2.250000
1986-12-15 0.434 0.414 5.596107 1.222494
1987-01-15 0.489 0.492 12.672811 18.840580
1987-02-15 0.474 0.481 -3.067485 -2.235772
1987-03-15 0.509 0.506 7.383966 5.197505
1987-04-15 0.504 0.505 -0.982318 -0.197628
1987-05-15 0.542 0.528 7.539683 4.554455
1987-06-15 0.557 0.535 2.767528 1.325758
1987-07-15 0.556 0.552 -0.179533 3.177570
1987-08-15 0.523 0.530 -5.935252 -3.985507
1987-09-15 0.518 0.496 -0.956023 -6.415094
1987-10-15 0.541 0.510 4.440154 2.822581
1987-11-15 0.516 0.497 -4.621072 -2.549020
1987-12-15 0.454 0.432 -12.015504 -13.078471
1988-01-15 0.449 0.433 -1.101322 0.231481
1988-02-15 0.461 0.452 2.672606 4.387991
1988-03-15 0.452 0.456 -1.952278 0.884956
1988-04-15 0.507 0.511 12.168142 12.061404
1988-05-15 0.522 0.510 2.958580 -0.195695
1988-06-15 0.518 0.498 -0.766284 -2.352941
1988-07-15 0.554 0.557 6.949807 11.847390
1988-08-15 0.483 0.482 -12.815884 -13.464991
1988-09-15 0.466 0.470 -3.519669 -2.489627
1988-10-15 0.511 0.498 9.656652 5.957447
1988-11-15 0.521 0.475 1.956947 -4.618474
... ... ... ... ...
2016-08-15 1.379 1.429 1.846381 5.228277
2016-09-15 1.438 1.393 4.278463 -2.519244
2016-10-15 1.522 1.489 5.841446 6.891601
2016-11-15 1.462 1.317 -3.942181 -11.551377
2016-12-15 1.634 1.566 11.764706 18.906606
2017-01-15 1.620 1.593 -0.856793 1.724138
2017-02-15 1.547 1.543 -4.506173 -3.138732
2017-03-15 1.492 1.523 -3.555268 -1.296176
2017-04-15 1.611 1.621 7.975871 6.434668
2017-05-15 1.540 1.519 -4.407200 -6.292412
2017-06-15 1.445 1.432 -6.168831 -5.727452
2017-07-15 1.562 1.518 8.096886 6.005587
2017-08-15 1.688 1.638 8.066581 7.905138
2017-09-15 1.867 1.753 10.604265 7.020757
2017-10-15 1.715 1.652 -8.141403 -5.761552
2017-11-15 1.830 1.757 6.705539 6.355932
2017-12-15 1.757 1.702 -3.989071 -3.130336
2018-01-15 1.899 1.857 8.081958 9.106933
2018-02-15 1.817 1.765 -4.318062 -4.954227
2018-03-15 1.834 1.820 0.935608 3.116147
2018-04-15 1.995 1.965 8.778626 7.967033
2018-05-15 2.129 2.091 6.716792 6.412214
2018-06-15 2.030 2.002 -4.650070 -4.256337
2018-07-15 2.074 2.043 2.167488 2.047952
2018-08-15 2.077 2.053 0.144648 0.489476
2018-09-15 2.093 2.045 0.770342 -0.389674
2018-10-15 2.028 1.969 -3.105590 -3.716381
2018-11-15 1.625 1.546 -19.871795 -21.482986
2018-12-15 1.449 1.357 -10.830769 -12.225097
2019-01-15 1.425 1.353 -1.656315 -0.294768

392 rows × 4 columns

In [12]:
#Regular Gasoline
df_rGasoline['Los Angeles Reformulated RBOB Regular Gasoline Monthly Change']=df_rGasoline['Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon)'].pct_change()*100
df_rGasoline['Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon)']=df_rGasoline['Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon)'].astype('float')
df_rGasoline.Date = pd.to_datetime(df_rGasoline.Date)
df_rGasoline.set_index('Date', inplace=True)
df_rGasoline
Out[12]:
1 Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon) Los Angeles Reformulated RBOB Regular Gasoline Monthly Change
Date
2003-06-15 1.072 NaN
2003-07-15 0.965 -9.981343
2003-08-15 1.315 36.269430
2003-09-15 0.949 -27.832700
2003-10-15 0.996 4.952582
2003-11-15 1.020 2.409639
2003-12-15 0.959 -5.980392
2004-01-15 1.079 12.513034
2004-02-15 1.344 24.559778
2004-03-15 1.323 -1.562500
2004-04-15 1.426 7.785336
2004-05-15 1.649 15.638149
2004-06-15 1.457 -11.643420
2004-07-15 1.467 0.686342
2004-08-15 1.388 -5.385140
2004-09-15 1.481 6.700288
2004-10-15 1.642 10.871033
2004-11-15 1.404 -14.494519
2004-12-15 1.185 -15.598291
2005-01-15 1.379 16.371308
2005-02-15 1.545 12.037708
2005-03-15 1.718 11.197411
2005-04-15 1.829 6.461001
2005-05-15 1.600 -12.520503
2005-06-15 1.745 9.062500
2005-07-15 1.882 7.851003
2005-08-15 2.142 13.815090
2005-09-15 2.278 6.349206
2005-10-15 1.902 -16.505707
2005-11-15 1.577 -17.087277
... ... ...
2016-08-15 1.388 -1.699717
2016-09-15 1.552 11.815562
2016-10-15 1.619 4.317010
2016-11-15 1.434 -11.426807
2016-12-15 1.500 4.602510
2017-01-15 1.612 7.466667
2017-02-15 1.756 8.933002
2017-03-15 1.720 -2.050114
2017-04-15 1.777 3.313953
2017-05-15 1.746 -1.744513
2017-06-15 1.619 -7.273769
2017-07-15 1.669 3.088326
2017-08-15 1.807 8.268424
2017-09-15 1.823 0.885445
2017-10-15 1.761 -3.400987
2017-11-15 1.836 4.258944
2017-12-15 1.646 -10.348584
2018-01-15 1.944 18.104496
2018-02-15 1.877 -3.446502
2018-03-15 2.080 10.815131
2018-04-15 2.213 6.394231
2018-05-15 2.295 3.705377
2018-06-15 2.136 -6.928105
2018-07-15 2.140 0.187266
2018-08-15 2.078 -2.897196
2018-09-15 2.224 7.025987
2018-10-15 2.254 1.348921
2018-11-15 1.739 -22.848270
2018-12-15 1.597 -8.165612
2019-01-15 1.607 0.626174

188 rows × 2 columns

In [13]:
#Heat Oil
df_heatOil['New York Harbor No. 2 Heating Oil Monthly Change']=df_heatOil['New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon)'].pct_change()*100
df_heatOil['New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon)']=df_heatOil['New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon)'].astype('float')
df_heatOil.Date = pd.to_datetime(df_heatOil.Date)
df_heatOil.set_index('Date', inplace=True)
df_heatOil
Out[13]:
1 New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon) New York Harbor No. 2 Heating Oil Monthly Change
Date
1986-06-15 0.380 NaN
1986-07-15 0.334 -12.105263
1986-08-15 0.408 22.155689
1986-09-15 0.402 -1.470588
1986-10-15 0.394 -1.990050
1986-11-15 0.422 7.106599
1986-12-15 0.447 5.924171
1987-01-15 0.521 16.554810
1987-02-15 0.474 -9.021113
1987-03-15 0.491 3.586498
1987-04-15 0.485 -1.221996
1987-05-15 0.518 6.804124
1987-06-15 0.523 0.965251
1987-07-15 0.547 4.588910
1987-08-15 0.524 -4.204753
1987-09-15 0.523 -0.190840
1987-10-15 0.563 7.648184
1987-11-15 0.559 -0.710480
1987-12-15 0.536 -4.114490
1988-01-15 0.518 -3.358209
1988-02-15 0.481 -7.142857
1988-03-15 0.475 -1.247401
1988-04-15 0.515 8.421053
1988-05-15 0.509 -1.165049
1988-06-15 0.448 -11.984283
1988-07-15 0.427 -4.687500
1988-08-15 0.434 1.639344
1988-09-15 0.406 -6.451613
1988-10-15 0.402 -0.985222
1988-11-15 0.449 11.691542
... ... ...
2016-08-15 1.325 2.554180
2016-09-15 1.351 1.962264
2016-10-15 1.488 10.140637
2016-11-15 1.390 -6.586022
2016-12-15 1.553 11.726619
2017-01-15 1.551 -0.128783
2017-02-15 1.562 0.709220
2017-03-15 1.492 -4.481434
2017-04-15 1.523 2.077748
2017-05-15 1.454 -4.530532
2017-06-15 1.332 -8.390646
2017-07-15 1.423 6.831832
2017-08-15 1.519 6.746311
2017-09-15 1.708 12.442396
2017-10-15 1.707 -0.058548
2017-11-15 1.823 6.795548
2017-12-15 1.863 2.194185
2018-01-15 2.017 8.266237
2018-02-15 1.853 -8.130887
2018-03-15 1.874 1.133297
2018-04-15 2.037 8.697972
2018-05-15 2.187 7.363770
2018-06-15 2.112 -3.429355
2018-07-15 2.108 -0.189394
2018-08-15 2.125 0.806452
2018-09-15 2.225 4.705882
2018-10-15 2.313 3.955056
2018-11-15 2.033 -12.105491
2018-12-15 1.784 -12.247909
2019-01-15 1.819 1.961883

392 rows × 2 columns

In [14]:
#Diesel Fuel
df_diesel['New York Harbor Monthly Change']=df_diesel['New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)'].pct_change()*100
df_diesel['U.S. Gulf Coast Monthly Change']=df_diesel['U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)'].pct_change()*100
df_diesel['Los Angeles, CA Monthly Change']=df_diesel['Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon)'].pct_change()*100
df_diesel['New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)']=df_diesel['New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)'].astype('float')
df_diesel['U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)']=df_diesel['U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)'].astype('float')
df_diesel['Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon)']=df_diesel['Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon)'].astype('float')
df_diesel.Date = pd.to_datetime(df_diesel.Date)
df_diesel.set_index('Date', inplace=True)
df_diesel
Out[14]:
1 New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon) U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon) Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon) New York Harbor Monthly Change U.S. Gulf Coast Monthly Change Los Angeles, CA Monthly Change
Date
1996-04-15 NaN NaN 0.950 NaN NaN NaN
1996-05-15 NaN NaN 0.870 NaN NaN -8.421053
1996-06-15 NaN NaN 0.745 NaN NaN -14.367816
1996-07-15 NaN NaN 0.692 NaN NaN -7.114094
1996-08-15 NaN NaN 0.705 NaN NaN 1.878613
1996-09-15 NaN NaN 0.768 NaN NaN 8.936170
1996-10-15 NaN NaN 0.817 NaN NaN 6.380208
1996-11-15 NaN NaN 0.741 NaN NaN -9.302326
1996-12-15 NaN NaN 0.735 NaN NaN -0.809717
1997-01-15 NaN NaN 0.773 NaN NaN 5.170068
1997-02-15 NaN NaN 0.789 NaN NaN 2.069858
1997-03-15 NaN NaN 0.751 NaN NaN -4.816223
1997-04-15 NaN NaN 0.731 NaN NaN -2.663116
1997-05-15 NaN NaN 0.623 NaN NaN -14.774282
1997-06-15 NaN NaN 0.589 NaN NaN -5.457464
1997-07-15 NaN NaN 0.571 NaN NaN -3.056027
1997-08-15 NaN NaN 0.679 NaN NaN 18.914186
1997-09-15 NaN NaN 0.645 NaN NaN -5.007364
1997-10-15 NaN NaN 0.685 NaN NaN 6.201550
1997-11-15 NaN NaN 0.703 NaN NaN 2.627737
1997-12-15 NaN NaN 0.601 NaN NaN -14.509246
1998-01-15 NaN NaN 0.566 NaN NaN -5.823627
1998-02-15 NaN NaN 0.497 NaN NaN -12.190813
1998-03-15 NaN NaN 0.479 NaN NaN -3.621730
1998-04-15 NaN NaN 0.552 NaN NaN 15.240084
1998-05-15 NaN NaN 0.518 NaN NaN -6.159420
1998-06-15 NaN NaN 0.469 NaN NaN -9.459459
1998-07-15 NaN NaN 0.470 NaN NaN 0.213220
1998-08-15 NaN NaN 0.463 NaN NaN -1.489362
1998-09-15 NaN NaN 0.482 NaN NaN 4.103672
... ... ... ... ... ... ...
2016-08-15 1.399 1.379 1.400 2.116788 2.680566 0.000000
2016-09-15 1.418 1.400 1.455 1.358113 1.522843 3.928571
2016-10-15 1.559 1.544 1.630 9.943583 10.285714 12.027491
2016-11-15 1.463 1.427 1.539 -6.157793 -7.577720 -5.582822
2016-12-15 1.642 1.596 1.656 12.235133 11.843027 7.602339
2017-01-15 1.623 1.593 1.671 -1.157125 -0.187970 0.905797
2017-02-15 1.624 1.608 1.670 0.061614 0.941620 -0.059844
2017-03-15 1.528 1.502 1.563 -5.911330 -6.592040 -6.407186
2017-04-15 1.586 1.558 1.639 3.795812 3.728362 4.862444
2017-05-15 1.514 1.482 1.547 -4.539723 -4.878049 -5.613179
2017-06-15 1.417 1.386 1.460 -6.406869 -6.477733 -5.623788
2017-07-15 1.521 1.486 1.570 7.339450 7.215007 7.534247
2017-08-15 1.631 1.601 1.697 7.232084 7.738896 8.089172
2017-09-15 1.791 1.777 1.915 9.809933 10.993129 12.846199
2017-10-15 1.799 1.756 1.863 0.446678 -1.181767 -2.715405
2017-11-15 1.916 1.851 1.975 6.503613 5.410023 6.011809
2017-12-15 1.944 1.867 1.907 1.461378 0.864398 -3.443038
2018-01-15 2.074 1.996 2.056 6.687243 6.909480 7.813319
2018-02-15 1.937 1.895 1.963 -6.605593 -5.060120 -4.523346
2018-03-15 1.930 1.895 1.996 -0.361384 0.000000 1.681100
2018-04-15 2.079 2.029 2.154 7.720207 7.071240 7.915832
2018-05-15 2.230 2.181 2.288 7.263107 7.491375 6.220984
2018-06-15 2.146 2.102 2.185 -3.766816 -3.622192 -4.501748
2018-07-15 2.140 2.098 2.177 -0.279590 -0.190295 -0.366133
2018-08-15 2.153 2.114 2.205 0.607477 0.762631 1.286174
2018-09-15 2.252 2.210 2.293 4.598235 4.541154 3.990930
2018-10-15 2.332 2.282 2.369 3.552398 3.257919 3.314435
2018-11-15 2.048 1.962 2.055 -12.178388 -14.022787 -13.254538
2018-12-15 1.801 1.687 1.793 -12.060547 -14.016310 -12.749392
2019-01-15 1.846 1.774 1.812 2.498612 5.157084 1.059677

274 rows × 6 columns

In [15]:
#Jet Fuel
df_jet['U.S. Gulf Coast Monthly Change']=df_jet['U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon)'].pct_change()*100
df_jet['U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon)']=df_jet['U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon)'].astype('float')
df_jet.Date = pd.to_datetime(df_jet.Date)
df_jet.set_index('Date', inplace=True)
df_jet
Out[15]:
1 U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon) U.S. Gulf Coast Monthly Change
Date
1990-04-15 0.540 NaN
1990-05-15 0.515 -4.629630
1990-06-15 0.494 -4.077670
1990-07-15 0.535 8.299595
1990-08-15 0.791 47.850467
1990-09-15 1.012 27.939317
1990-10-15 1.196 18.181818
1990-11-15 0.971 -18.812709
1990-12-15 0.803 -17.301751
1991-01-15 0.741 -7.721046
1991-02-15 0.637 -14.035088
1991-03-15 0.558 -12.401884
1991-04-15 0.552 -1.075269
1991-05-15 0.569 3.079710
1991-06-15 0.547 -3.866432
1991-07-15 0.586 7.129799
1991-08-15 0.623 6.313993
1991-09-15 0.635 1.926164
1991-10-15 0.671 5.669291
1991-11-15 0.645 -3.874814
1991-12-15 0.522 -19.069767
1992-01-15 0.509 -2.490421
1992-02-15 0.543 6.679764
1992-03-15 0.514 -5.340700
1992-04-15 0.543 5.642023
1992-05-15 0.579 6.629834
1992-06-15 0.615 6.217617
1992-07-15 0.611 -0.650407
1992-08-15 0.590 -3.436989
1992-09-15 0.623 5.593220
... ... ...
2016-08-15 1.295 1.808176
2016-09-15 1.319 1.853282
2016-10-15 1.457 10.462472
2016-11-15 1.356 -6.932052
2016-12-15 1.491 9.955752
2017-01-15 1.514 1.542589
2017-02-15 1.547 2.179657
2017-03-15 1.445 -6.593407
2017-04-15 1.510 4.498270
2017-05-15 1.412 -6.490066
2017-06-15 1.295 -8.286119
2017-07-15 1.417 9.420849
2017-08-15 1.561 10.162315
2017-09-15 1.800 15.310698
2017-10-15 1.659 -7.833333
2017-11-15 1.760 6.088005
2017-12-15 1.817 3.238636
2018-01-15 1.952 7.429829
2018-02-15 1.849 -5.276639
2018-03-15 1.858 0.486750
2018-04-15 2.015 8.449946
2018-05-15 2.155 6.947891
2018-06-15 2.090 -3.016241
2018-07-15 2.098 0.382775
2018-08-15 2.118 0.953289
2018-09-15 2.189 3.352219
2018-10-15 2.249 2.740978
2018-11-15 1.945 -13.517119
2018-12-15 1.696 -12.802057
2019-01-15 1.784 5.188679

346 rows × 2 columns

In [16]:
#Propane
df_prop['Mont Belvieu, TX Propane Monthly Change']=df_prop['Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon)'].pct_change()*100
df_prop['Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon)']=df_prop['Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon)'].astype('float')
df_prop.Date = pd.to_datetime(df_prop.Date)
df_prop.set_index('Date', inplace=True)
df_prop
Out[16]:
1 Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon) Mont Belvieu, TX Propane Monthly Change
Date
1992-06-15 0.344 NaN
1992-07-15 0.342 -0.581395
1992-08-15 0.354 3.508772
1992-09-15 0.375 5.932203
1992-10-15 0.354 -5.600000
1992-11-15 0.327 -7.627119
1992-12-15 0.312 -4.587156
1993-01-15 0.337 8.012821
1993-02-15 0.330 -2.077151
1993-03-15 0.342 3.636364
1993-04-15 0.344 0.584795
1993-05-15 0.328 -4.651163
1993-06-15 0.328 0.000000
1993-07-15 0.314 -4.268293
1993-08-15 0.305 -2.866242
1993-09-15 0.299 -1.967213
1993-10-15 0.293 -2.006689
1993-11-15 0.275 -6.143345
1993-12-15 0.245 -10.909091
1994-01-15 0.263 7.346939
1994-02-15 0.290 10.266160
1994-03-15 0.284 -2.068966
1994-04-15 0.289 1.760563
1994-05-15 0.296 2.422145
1994-06-15 0.288 -2.702703
1994-07-15 0.292 1.388889
1994-08-15 0.300 2.739726
1994-09-15 0.299 -0.333333
1994-10-15 0.324 8.361204
1994-11-15 0.344 6.172840
... ... ...
2016-08-15 0.449 -6.066946
2016-09-15 0.495 10.244989
2016-10-15 0.573 15.757576
2016-11-15 0.538 -6.108202
2016-12-15 0.637 18.401487
2017-01-15 0.747 17.268446
2017-02-15 0.768 2.811245
2017-03-15 0.615 -19.921875
2017-04-15 0.651 5.853659
2017-05-15 0.640 -1.689708
2017-06-15 0.590 -7.812500
2017-07-15 0.647 9.661017
2017-08-15 0.758 17.156105
2017-09-15 0.883 16.490765
2017-10-15 0.934 5.775764
2017-11-15 0.980 4.925054
2017-12-15 0.957 -2.346939
2018-01-15 0.904 -5.538140
2018-02-15 0.827 -8.517699
2018-03-15 0.788 -4.715840
2018-04-15 0.821 4.187817
2018-05-15 0.917 11.693057
2018-06-15 0.880 -4.034896
2018-07-15 0.938 6.590909
2018-08-15 0.974 3.837953
2018-09-15 1.057 8.521561
2018-10-15 0.959 -9.271523
2018-11-15 0.745 -22.314911
2018-12-15 0.677 -9.127517
2019-01-15 0.665 -1.772526

320 rows × 2 columns

Summarized price and price movement by year for all petrolum products

In [17]:
def getYearlySummary(data):
    return data.resample('Y').mean()
In [18]:
#Crude Oil Yearly Summarized Table
getYearlySummary(df_crudeOil)
Out[18]:
1 Cushing, OK WTI Spot Price FOB (Dollars per Barrel) Europe Brent Spot Price FOB (Dollars per Barrel) Cushing, OK WTI Monthly Change Europe Brent Monthly Change
Date
1986-12-31 15.036667 NaN -1.711560 NaN
1987-12-31 19.171667 18.522500 0.780099 -1.146253
1988-12-31 15.982500 14.948333 -0.239611 -0.582507
1989-12-31 19.640833 18.251667 2.261132 2.389460
1990-12-31 24.467500 23.675833 3.418130 4.598114
1991-12-31 21.502500 20.010833 -2.465033 -3.154075
1992-12-31 20.563333 19.305833 0.044119 -0.033064
1993-12-31 18.450833 17.042500 -2.264988 -2.190267
1994-12-31 17.191667 15.840833 1.548474 1.380291
1995-12-31 18.439167 17.044167 0.965250 1.116250
1996-12-31 22.113333 20.635000 2.571583 2.548812
1997-12-31 20.610000 19.118333 -2.457018 -2.450318
1998-12-31 14.446667 12.779167 -3.714227 -4.232521
1999-12-31 19.260833 17.846667 7.472825 8.671402
2000-12-31 30.300833 28.522500 1.143356 0.731548
2001-12-31 25.947500 24.450000 -2.954396 -2.215634
2002-12-31 26.115000 24.963333 3.781293 3.825545
2003-12-31 31.120833 28.882500 1.054619 0.772481
2004-12-31 41.443333 38.230000 2.789403 2.763941
2005-12-31 56.492500 54.418333 2.920053 3.355490
2006-12-31 66.018333 65.146667 0.573744 1.078917
2007-12-31 72.318333 72.465000 3.544767 3.472133
2008-12-31 99.571667 96.847500 -5.339015 -5.528531
2009-12-31 61.654167 61.490000 5.530528 5.619015
2010-12-31 79.395000 79.511667 1.657553 1.853841
2011-12-31 94.874167 111.264167 1.107554 1.523111
2012-12-31 94.110833 111.651667 -0.780515 0.354714
2013-12-31 97.905833 108.637500 0.983733 0.155575
2014-12-31 93.258333 99.023333 -3.784017 -4.448734
2015-12-31 48.688333 52.353333 -3.214574 -3.321777
2016-12-31 43.144167 43.548333 3.396200 3.447007
2017-12-31 50.884167 54.247500 1.020199 1.715749
2018-12-31 64.938333 71.060833 -0.948189 -0.580773
2019-12-31 51.380000 59.410000 3.756058 3.573919
In [19]:
#Conventional Gasoline Yearly Summarized Table
getYearlySummary(df_cGasoline)
Out[19]:
1 New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon) U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon) New York Harbor Conventional Gasoline Monthly Change U.S. Gulf Coast Conventional Gasoline Monthly Change
Date
1986-12-31 0.408714 0.397143 1.382851 0.976929
1987-12-31 0.515250 0.505333 0.587246 0.621413
1988-12-31 0.493000 0.481667 0.566859 0.371622
1989-12-31 0.569667 0.557417 1.724462 2.252828
1990-12-31 0.728250 0.707500 2.908794 2.803443
1991-12-31 0.662083 0.634917 -1.414225 -1.542236
1992-12-31 0.590833 0.574667 -0.225881 0.003469
1993-12-31 0.515500 0.508583 -2.613259 -2.505896
1994-12-31 0.490333 0.479417 2.163158 1.868256
1995-12-31 0.542583 0.509250 1.549958 1.319584
1996-12-31 0.618833 0.596417 2.389770 2.768112
1997-12-31 0.603667 0.585250 -1.972954 -1.957428
1998-12-31 0.427833 0.413250 -3.994616 -3.966470
1999-12-31 0.541750 0.519500 7.771594 7.561274
2000-12-31 0.859250 0.834333 1.204309 1.502782
2001-12-31 0.740500 0.739500 -2.088952 -1.696620
2002-12-31 0.722667 0.718583 4.079436 4.204147
2003-12-31 0.885167 0.871750 1.182203 1.258588
2004-12-31 1.179667 1.168667 2.091784 2.112571
2005-12-31 1.558667 1.589500 4.185396 4.565266
2006-12-31 1.821917 1.824000 1.097165 0.840040
2007-12-31 2.059333 2.037833 3.214617 3.416356
2008-12-31 2.448333 2.468750 -5.804690 -5.357999
2009-12-31 1.659083 1.629333 6.380316 6.547560
2010-12-31 2.092250 2.051083 1.989649 1.829971
2011-12-31 2.795000 2.745750 1.000851 1.013929
2012-12-31 2.939667 2.813667 0.559886 0.072831
2013-12-31 2.812917 2.696000 0.124165 0.298184
2014-12-31 2.612333 2.488083 -3.666401 -4.185907
2015-12-31 1.611750 1.553000 -1.776860 -0.977255
2016-12-31 1.390750 1.331750 2.527379 3.099277
2017-12-31 1.639500 1.604250 0.818700 0.841630
2018-12-31 1.920833 1.876083 -1.265069 -1.490412
2019-12-31 1.425000 1.353000 -1.656315 -0.294768
In [20]:
#Regular Gasoline Yearly Summarized Table
getYearlySummary(df_rGasoline)
Out[20]:
1 Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon) Los Angeles Reformulated RBOB Regular Gasoline Monthly Change
Date
2003-12-31 1.039429 -0.027131
2004-12-31 1.403750 2.505841
2005-12-31 1.766917 3.239223
2006-12-31 2.062583 1.665185
2007-12-31 2.291833 3.017479
2008-12-31 2.626583 -4.946264
2009-12-31 1.840917 5.602993
2010-12-31 2.209667 1.691042
2011-12-31 2.894917 1.172236
2012-12-31 3.030583 0.137166
2013-12-31 2.921917 0.444020
2014-12-31 2.668083 -4.196740
2015-12-31 1.945750 2.388995
2016-12-31 1.453833 1.019094
2017-12-31 1.731000 0.949733
2018-12-31 2.048083 0.274644
2019-12-31 1.607000 0.626174
In [21]:
#Heat Oil Yearly Summarized Table
getYearlySummary(df_heatOil)
Out[21]:
1 New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon) New York Harbor No. 2 Heating Oil Monthly Change
Date
1986-12-31 0.398143 3.270093
1987-12-31 0.522000 1.723675
1988-12-31 0.464500 -0.140371
1989-12-31 0.557333 4.389577
1990-12-31 0.687833 1.049165
1991-12-31 0.612500 -3.113607
1992-12-31 0.572333 0.513530
1993-12-31 0.524750 -1.792132
1994-12-31 0.494000 1.086155
1995-12-31 0.493417 1.595513
1996-12-31 0.631000 2.246832
1997-12-31 0.563917 -2.652192
1998-12-31 0.393000 -3.764043
1999-12-31 0.488917 6.953559
2000-12-31 0.877500 3.605584
2001-12-31 0.712000 -4.587341
2002-12-31 0.679500 4.021223
2003-12-31 0.850167 1.341614
2004-12-31 1.116500 3.347040
2005-12-31 1.621417 2.761157
2006-12-31 1.803333 0.091258
2007-12-31 2.031583 3.784936
2008-12-31 2.852750 -4.043217
2009-12-31 1.641417 3.260586
2010-12-31 2.124833 1.998923
2011-12-31 2.946333 1.448881
2012-12-31 3.023750 0.448131
2013-12-31 2.925917 0.169685
2014-12-31 2.696417 -3.869339
2015-12-31 1.546750 -4.214499
2016-12-31 1.283500 3.756562
2017-12-31 1.579750 1.683941
2018-12-31 2.055667 -0.097864
2019-12-31 1.819000 1.961883
In [22]:
#Diesel Yearly Summarized Table
getYearlySummary(df_diesel)
Out[22]:
1 New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon) U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon) Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon) New York Harbor Monthly Change U.S. Gulf Coast Monthly Change Los Angeles, CA Monthly Change
Date
1996-12-31 NaN NaN 0.780333 NaN NaN -2.852502
1997-12-31 NaN NaN 0.678333 NaN NaN -1.275027
1998-12-31 NaN NaN 0.485917 NaN NaN -3.218516
1999-12-31 NaN NaN 0.631000 NaN NaN 6.840829
2000-12-31 NaN NaN 0.977917 NaN NaN 3.209220
2001-12-31 NaN NaN 0.816000 NaN NaN -4.702413
2002-12-31 NaN NaN 0.749917 NaN NaN 3.658444
2003-12-31 NaN NaN 0.914667 NaN NaN 1.809087
2004-12-31 NaN NaN 1.317667 NaN NaN 2.913440
2005-12-31 NaN NaN 1.789083 NaN NaN 3.346248
2006-12-31 1.971857 1.961857 2.082583 -1.798454 -2.307280 1.522250
2007-12-31 2.151417 2.145750 2.248167 3.254040 3.206231 2.210897
2008-12-31 2.973583 2.920333 2.908417 -3.996835 -4.069422 -4.281085
2009-12-31 1.694417 1.659250 1.696417 3.071251 3.373839 3.777950
2010-12-31 2.194083 2.155917 2.204083 2.035142 1.951068 1.976082
2011-12-31 3.014417 2.969500 3.048750 1.482042 1.497582 1.452001
2012-12-31 3.113250 3.052917 3.106083 0.518776 0.401727 0.405499
2013-12-31 3.014000 2.967833 3.012750 -0.017374 0.038096 0.158181
2014-12-31 2.804833 2.710833 2.785750 -3.164201 -3.875548 -3.530439
2015-12-31 1.663333 1.579167 1.660333 -4.231931 -3.542397 -2.949633
2016-12-31 1.351417 1.317500 1.386667 3.481512 3.589359 3.025174
2017-12-31 1.657833 1.622250 1.706417 1.552959 1.464490 1.365602
2018-12-31 2.093500 2.037583 2.127833 -0.401971 -0.573159 -0.264365
2019-12-31 1.846000 1.774000 1.812000 2.498612 5.157084 1.059677
In [23]:
#Jet Fuel Yearly Summarized Table
getYearlySummary(df_jet)
Out[23]:
1 U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon) U.S. Gulf Coast Monthly Change
Date
1990-12-31 0.761889 7.181180
1991-12-31 0.607167 -3.160445
1992-12-31 0.571083 0.485898
1993-12-31 0.529333 -1.445555
1994-12-31 0.493333 0.953750
1995-12-31 0.494333 1.184620
1996-12-31 0.610667 2.087636
1997-12-31 0.560083 -2.616431
1998-12-31 0.403500 -3.719445
1999-12-31 0.496083 7.536698
2000-12-31 0.848667 2.086970
2001-12-31 0.723667 -3.964171
2002-12-31 0.685333 4.085589
2003-12-31 0.825667 1.195667
2004-12-31 1.149917 3.210222
2005-12-31 1.710167 3.758593
2006-12-31 1.920750 0.596269
2007-12-31 2.129917 3.228612
2008-12-31 2.961667 -3.963651
2009-12-31 1.659250 3.491947
2010-12-31 2.146083 1.897716
2011-12-31 2.996083 1.453829
2012-12-31 3.056500 0.364183
2013-12-31 2.923417 0.130552
2014-12-31 2.696500 -3.822657
2015-12-31 1.525500 -3.639839
2016-12-31 1.249167 3.047348
2017-12-31 1.561417 1.936508
2018-12-31 2.017833 -0.322365
2019-12-31 1.784000 5.188679
In [24]:
#Propane Yearly Summarized Table
getYearlySummary(df_prop)
Out[24]:
1 Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon) Mont Belvieu, TX Propane Monthly Change
Date
1992-12-31 0.344000 -1.492449
1993-12-31 0.311667 -1.887934
1994-12-31 0.300167 2.679649
1995-12-31 0.320417 0.608008
1996-12-31 0.423250 4.888069
1997-12-31 0.374083 -4.409562
1998-12-31 0.261250 -3.674093
1999-12-31 0.340417 6.365428
2000-12-31 0.581583 5.788992
2001-12-31 0.472417 -7.223847
2002-12-31 0.409333 5.028779
2003-12-31 0.577750 2.408560
2004-12-31 0.742583 2.313509
2005-12-31 0.912500 3.005146
2006-12-31 1.013417 -0.553427
2007-12-31 1.207417 4.041372
2008-12-31 1.412083 -6.365971
2009-12-31 0.841833 6.237997
2010-12-31 1.165500 0.899653
2011-12-31 1.463333 0.662267
2012-12-31 1.003917 -4.133757
2013-12-31 1.000750 4.117555
2014-12-31 1.042000 -5.824235
2015-12-31 0.457000 -2.181722
2016-12-31 0.484417 4.811955
2017-12-31 0.764167 4.014253
2018-12-31 0.873917 -2.390769
2019-12-31 0.665000 -1.772526

Exploratory Analysis

In [25]:
#Functions needed for Exploratory Analysis
def getImportantMonths(data):
    comments=[]
    for i in data.columns:
            comments.append(str(i))
            comments.append("min: "+str(min(data[i].fillna(9999)))+" in "+str(data[data[i] == min(data[i].fillna(9999))].index.tolist()[0]))
            comments.append("max: "+str(max(data[i].fillna(-1)))+" in "+str(data[data[i] == max(data[i].fillna(-1))].index.tolist()[0]))
    return comments

def getDistribution(data):
    for i in data.columns:
        print("Distribution of "+str(i))
        plt.hist(data[i])
        plt.show()

Crude Oil Analysis

In [26]:
#Crude Oil Data Visualizations
trace0 = go.Scatter(
    x = df_crudeOil.index,
    y = df_crudeOil['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)'],
    name = 'Cushing, OK WTI',
    line = dict(
        color = ('green'),
        width = 2)
)
trace1 = go.Scatter(
    x = df_crudeOil.index,
    y = df_crudeOil['Europe Brent Spot Price FOB (Dollars per Barrel)'],
    name = 'Europe Brent',
    line = dict(
        color = ('orange'),
        width = 2,)
)


data = [trace0, trace1]

layout = dict(title = 'Crude Oil Analysis',
              xaxis = dict(title = 'Year'),
              yaxis = dict(title = 'Dollars per Barrel'),
              )

fig = dict(data=data, layout=layout)

offline.iplot(fig, filename='styled-line')
In [27]:
df_crudeOil.describe()
Out[27]:
1 Cushing, OK WTI Spot Price FOB (Dollars per Barrel) Europe Brent Spot Price FOB (Dollars per Barrel) Cushing, OK WTI Monthly Change Europe Brent Monthly Change
count 397.000000 381.000000 396.000000 380.000000
mean 43.808791 46.000394 0.580215 0.703086
std 29.460501 33.013959 8.665466 8.972040
min 11.350000 9.820000 -32.577410 -26.725342
25% 19.660000 18.670000 -4.673542 -4.804217
50% 29.840000 30.200000 1.136758 0.799366
75% 63.800000 67.490000 5.709207 6.337508
max 133.880000 132.720000 48.021680 58.241118
In [28]:
getImportantMonths(df_crudeOil)
Out[28]:
['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)',
 'min: 11.35 in 1998-12-15 00:00:00',
 'max: 133.88 in 2008-06-15 00:00:00',
 'Europe Brent Spot Price FOB (Dollars per Barrel)',
 'min: 9.82 in 1998-12-15 00:00:00',
 'max: 132.72 in 2008-07-15 00:00:00',
 'Cushing, OK WTI Monthly Change',
 'min: -32.57740950719581 in 1986-02-15 00:00:00',
 'max: 48.021680216802174 in 1990-08-15 00:00:00',
 'Europe Brent Monthly Change',
 'min: -26.725342274378317 in 2008-11-15 00:00:00',
 'max: 58.241118229470004 in 1990-08-15 00:00:00']
In [29]:
getDistribution(df_crudeOil)
Distribution of Cushing, OK WTI Spot Price FOB (Dollars per Barrel)
Distribution of Europe Brent Spot Price FOB (Dollars per Barrel)
Distribution of Cushing, OK WTI Monthly Change
Distribution of Europe Brent Monthly Change
In [30]:
cmap = sns.color_palette("Blues")
sns.heatmap(df_crudeOil.iloc[:,:2],cmap=cmap)
Out[30]:
<matplotlib.axes._subplots.AxesSubplot at 0x228fddbc7b8>
Findings from the Crude Oil Prices 1) Prices for crude oil in Crushing was maximum in June 2008 and in Europe Bent it was maximum in July 2008 2) The price percent change from July 1990 to August 1990 was almost 50 percent for both cities 3) The prices for both cities started to increase from 2004

Conventional Gasoline

In [31]:
trace0 = go.Scatter(
    x = df_cGasoline.index,
    y = df_cGasoline['New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)'],
    name = 'New York Harbor',
    line = dict(
        color = ('green'),
        width = 2)
)
trace1 = go.Scatter(
    x = df_cGasoline.index,
    y = df_cGasoline['U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)'],
    name = 'U.S. Gulf Coast',
    line = dict(
        color = ('orange'),
        width = 2,)
)


data = [trace0, trace1]

layout = dict(title = 'Conventional Gasoline Analysis',
              xaxis = dict(title = 'Year'),
              yaxis = dict(title = 'Dollars per Gallon'),
              )

fig = dict(data=data, layout=layout)

offline.iplot(fig, filename='styled-line')
In [32]:
df_cGasoline.describe()
Out[32]:
1 New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon) U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon) New York Harbor Conventional Gasoline Monthly Change U.S. Gulf Coast Conventional Gasoline Monthly Change
count 392.000000 392.000000 391.000000 391.000000
mean 1.267541 1.238556 0.793295 0.858009
std 0.829709 0.813193 9.777725 10.480756
min 0.307000 0.294000 -33.177083 -43.084767
25% 0.556750 0.548750 -4.872642 -5.501255
50% 0.886500 0.863500 0.770342 1.016949
75% 1.919250 1.866000 6.620545 6.704321
max 3.292000 3.284000 37.366003 43.853821
In [33]:
getImportantMonths(df_cGasoline)
Out[33]:
['New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)',
 'min: 0.307 in 1998-12-15 00:00:00',
 'max: 3.292 in 2008-06-15 00:00:00',
 'U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)',
 'min: 0.294 in 1998-12-15 00:00:00',
 'max: 3.284 in 2008-06-15 00:00:00',
 'New York Harbor Conventional Gasoline Monthly Change',
 'min: -33.177083333333336 in 2008-11-15 00:00:00',
 'max: 37.36600306278712 in 1990-08-15 00:00:00',
 'U.S. Gulf Coast Conventional Gasoline Monthly Change',
 'min: -43.08476736775015 in 2008-10-15 00:00:00',
 'max: 43.85382059800664 in 1990-08-15 00:00:00']
In [34]:
getDistribution(df_cGasoline)
Distribution of New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)
Distribution of U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)
Distribution of New York Harbor Conventional Gasoline Monthly Change
Distribution of U.S. Gulf Coast Conventional Gasoline Monthly Change
In [35]:
cmap = sns.color_palette("Blues")
sns.heatmap(df_cGasoline.iloc[:,:2],cmap=cmap)
Out[35]:
<matplotlib.axes._subplots.AxesSubplot at 0x228fdfb5f60>
Findings from Conventional Gasoline Price Analyses: 1) During June 2008, the prices for the both cities were the highest 2) The price percent change from July 1990 to August 1990 was almost 40 percent for both cities 3) The prices for both cities started to increase from 2004 4) Prices of Conventional Gasoline were mostly in the range (0,1) 5) In the year 2008, there was a steep down fall of prices witthin 6 months

Regular Gasoline

In [36]:
trace0 = go.Scatter(
    x = df_rGasoline.index,
    y = df_rGasoline['Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon)'],
    name = 'New York Harbor',
    line = dict(
        color = ('green'),
        width = 2)
)

data = [trace0]

layout = dict(title = 'Regular Gasoline Analysis',
              xaxis = dict(title = 'Year'),
              yaxis = dict(title = 'Dollars per Gallon'),
              )

fig = dict(data=data, layout=layout)

offline.iplot(fig, filename='styled-line')
In [37]:
df_rGasoline.describe()
Out[37]:
1 Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon) Los Angeles Reformulated RBOB Regular Gasoline Monthly Change
count 188.000000 187.000000
mean 2.147021 0.962776
std 0.631317 12.348753
min 0.949000 -40.329412
25% 1.642000 -6.344894
50% 2.079000 1.097017
75% 2.668500 7.790218
max 3.694000 57.525773
In [38]:
getImportantMonths(df_rGasoline)
Out[38]:
['Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon)',
 'min: 0.949 in 2003-09-15 00:00:00',
 'max: 3.694 in 2008-06-15 00:00:00',
 'Los Angeles Reformulated RBOB Regular Gasoline Monthly Change',
 'min: -40.32941176470588 in 2008-11-15 00:00:00',
 'max: 57.525773195876305 in 2016-03-15 00:00:00']
In [39]:
getDistribution(df_rGasoline)
Distribution of Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon)
Distribution of Los Angeles Reformulated RBOB Regular Gasoline Monthly Change
Findings from Regular Gasoline Price Analyses: 1) During June 2008, the prices for Regular Gasoline were the highest 2) The max price percent change was in 2016 was almost 60 percent in LA 3) The prices started to increase from 2004 4) Prices of Regular Gasoline were almost normally distributed

Heat Oil Analyses

In [40]:
#Heat Oil
trace0 = go.Scatter(
    x = df_heatOil.index,
    y = df_heatOil['New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon)'],
    name = 'New York Harbor',
    line = dict(
        color = ('green'),
        width = 2)
)

data = [trace0]

layout = dict(title = 'Heat Oil Analysis',
              xaxis = dict(title = 'Year'),
              yaxis = dict(title = 'Dollars per Gallon'),
              )

fig = dict(data=data, layout=layout)

offline.iplot(fig, filename='styled-line')
In [41]:
df_heatOil.describe()
Out[41]:
1 New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon) New York Harbor No. 2 Heating Oil Monthly Change
count 392.000000 391.000000
mean 1.278319 0.761170
std 0.885156 8.591302
min 0.304000 -23.928378
25% 0.539750 -4.399083
50% 0.840000 0.806452
75% 1.867000 5.610438
max 3.801000 41.807910
In [42]:
getImportantMonths(df_heatOil)
Out[42]:
['New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon)',
 'min: 0.304 in 1999-02-15 00:00:00',
 'max: 3.801 in 2008-06-15 00:00:00',
 'New York Harbor No. 2 Heating Oil Monthly Change',
 'min: -23.928377645143794 in 2008-12-15 00:00:00',
 'max: 41.807909604519764 in 1990-08-15 00:00:00']
In [43]:
getDistribution(df_heatOil)
Distribution of New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon)
Distribution of New York Harbor No. 2 Heating Oil Monthly Change
Findings from Heat Oil Price Analyses: 1) During June 2008, the prices for Heat Oil were the highest 2) The max price percent change was in 1990 was almost 40 percent in NY 3) The prices started to increase from 2004

Diesel Oil Price Analysis

In [44]:
#Diesel Oil
trace0 = go.Scatter(
    x = df_diesel.index,
    y = df_diesel['New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)'],
    name = 'New York Harbor',
    line = dict(
        color = ('rgb(228,26,28)'),
        width = 2)
)
trace1 = go.Scatter(
    x = df_diesel.index,
    y = df_diesel['U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)'],
    name = 'U.S. Gulf Coast',
    line = dict(
        color = ('rgb(55,126,184)'),
        width = 2,)
)
trace2 = go.Scatter(
    x = df_diesel.index,
    y = df_diesel['Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon)'],
    name = 'Los Angeles, CA',
    line = dict(
        color = ('rgb(77,175,74)'),
        width = 2,)
)


data = [trace0, trace1,trace2]

layout = dict(title = 'Diesel Oil Analysis',
              xaxis = dict(title = 'Year'),
              yaxis = dict(title = 'Dollars per Gallon'),
              )

fig = dict(data=data, layout=layout)

offline.iplot(fig, filename='styled-line')
In [45]:
df_diesel.describe()
Out[45]:
1 New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon) U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon) Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon) New York Harbor Monthly Change U.S. Gulf Coast Monthly Change Los Angeles, CA Monthly Change
count 152.000000 152.000000 274.000000 151.000000 151.000000 273.000000
mean 2.291855 2.244559 1.711139 0.229860 0.217588 0.687299
std 0.672420 0.667094 0.882817 7.785065 7.990918 9.590957
min 0.979000 0.958000 0.391000 -25.325013 -25.556158 -25.016790
25% 1.794750 1.768750 0.872250 -3.889112 -4.113479 -5.023022
50% 2.138000 2.105000 1.663000 0.885559 0.728700 1.267281
75% 2.959000 2.914000 2.285500 4.884422 5.415227 6.220984
max 3.887000 3.851000 3.894000 19.736842 21.020819 53.708440
In [46]:
getImportantMonths(df_diesel)
Out[46]:
['New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)',
 'min: 0.979 in 2016-01-15 00:00:00',
 'max: 3.887 in 2008-06-15 00:00:00',
 'U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)',
 'min: 0.958 in 2016-01-15 00:00:00',
 'max: 3.851 in 2008-06-15 00:00:00',
 'Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon)',
 'min: 0.391 in 1998-12-15 00:00:00',
 'max: 3.894 in 2008-06-15 00:00:00',
 'New York Harbor Monthly Change',
 'min: -25.325013000520023 in 2008-12-15 00:00:00',
 'max: 19.736842105263165 in 2009-06-15 00:00:00',
 'U.S. Gulf Coast Monthly Change',
 'min: -25.556158437330435 in 2008-12-15 00:00:00',
 'max: 21.020819341840145 in 2009-06-15 00:00:00',
 'Los Angeles, CA Monthly Change',
 'min: -25.01678979180658 in 2008-10-15 00:00:00',
 'max: 53.7084398976982 in 1999-03-15 00:00:00']
In [47]:
getDistribution(df_diesel)
Distribution of New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)
Distribution of U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)
Distribution of Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon)
Distribution of New York Harbor Monthly Change
Distribution of U.S. Gulf Coast Monthly Change
Distribution of Los Angeles, CA Monthly Change
Findings from Diesel Oil Price Analyses: 1) During June 2008, the prices for Diesel Oil were the highest in all the listed cities 2) The max price percent change was in 1990 was almost 60 percent in LA and other cities saw less max percent change comparatively in the year 2009 3) The prices started to increase from 2004

Jet Fuel Analysis

In [48]:
#Jet Fuel
trace0 = go.Scatter(
    x = df_jet.index,
    y = df_jet['U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon)'],
    name = 'U.S. Gulf Coast',
    line = dict(
        color = ('green'),
        width = 2)
)

data = [trace0]

layout = dict(title = 'Jet Fuel Analysis',
              xaxis = dict(title = 'Year'),
              yaxis = dict(title = 'Dollars per Gallon'),
              )

fig = dict(data=data, layout=layout)

offline.iplot(fig, filename='styled-line')
In [49]:
df_jet.describe()
Out[49]:
1 U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon) U.S. Gulf Coast Monthly Change
count 346.000000 345.000000
mean 1.396757 0.740796
std 0.911256 8.852279
min 0.304000 -31.407407
25% 0.565000 -4.032258
50% 1.087000 0.904159
75% 2.050000 6.088005
max 3.886000 47.850467
In [50]:
getImportantMonths(df_jet)
Out[50]:
['U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon)',
 'min: 0.304 in 1998-12-15 00:00:00',
 'max: 3.886 in 2008-07-15 00:00:00',
 'U.S. Gulf Coast Monthly Change',
 'min: -31.407407407407405 in 2008-10-15 00:00:00',
 'max: 47.850467289719624 in 1990-08-15 00:00:00']
In [51]:
getDistribution(df_jet)
Distribution of U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon)
Distribution of U.S. Gulf Coast Monthly Change
Findings from Jet Oil Price Analyses: 1) During July 2008, the prices for Jet Oil were the highest in U.S. Gulf Coast 2) The max price percent change was in 1990 was almost 50 percent 3) The prices started to increase from 2004

Propane Analysis

In [52]:
#Propane
trace0 = go.Scatter(
    x = df_prop.index,
    y = df_prop['Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon)'],
    name = 'Mont Belvieu, TX',
    line = dict(
        color = ('green'),
        width = 2)
)

data = [trace0]

layout = dict(title = 'Propane Analysis',
              xaxis = dict(title = 'Year'),
              yaxis = dict(title = 'Dollars per Gallon'),
              )

fig = dict(data=data, layout=layout)

offline.iplot(fig, filename='styled-line')
In [53]:
df_prop.describe()
Out[53]:
1 Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon) Mont Belvieu, TX Propane Monthly Change
count 320.000000 319.000000
mean 0.712997 0.689603
std 0.379146 9.731611
min 0.209000 -31.699346
25% 0.372000 -4.995965
50% 0.612500 0.619469
75% 0.968000 6.407099
max 1.862000 29.672897
In [54]:
getImportantMonths(df_prop)
Out[54]:
['Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon)',
 'min: 0.209 in 1998-12-15 00:00:00',
 'max: 1.862 in 2008-07-15 00:00:00',
 'Mont Belvieu, TX Propane Monthly Change',
 'min: -31.699346405228766 in 2008-10-15 00:00:00',
 'max: 29.672897196261694 in 2000-01-15 00:00:00']
In [55]:
getDistribution(df_prop)
Distribution of Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon)
Distribution of Mont Belvieu, TX Propane Monthly Change
Findings from Jet Oil Price Analyses: 1) During July 2008, the prices for Propane Oil were the highest in Mont Belvieu, TX 2) The max price percent change was in 2000 was almost 30 percent 3) The prices started to increase from 2002

Let's club all the datasets to get a view of the price of all of them combinedly

Transforming the unit Barrel to Gallon price of Crude Oil dataset to make proper comparison between the other datasets

In [56]:
df_crudeOil['Cushing, OK WTI Spot Price FOB (Dollars per Gallon)']=df_crudeOil['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)'].apply(lambda x: x/31.5)
df_crudeOil['Europe Brent Spot Price FOB (Dollars per Gallon)']=df_crudeOil['Europe Brent Spot Price FOB (Dollars per Barrel)'].apply(lambda x: x/31.5)
df_crudeOil
Out[56]:
1 Cushing, OK WTI Spot Price FOB (Dollars per Barrel) Europe Brent Spot Price FOB (Dollars per Barrel) Cushing, OK WTI Monthly Change Europe Brent Monthly Change Cushing, OK WTI Spot Price FOB (Dollars per Gallon) Europe Brent Spot Price FOB (Dollars per Gallon)
Date
1986-01-15 22.93 NaN NaN NaN 0.727937 NaN
1986-02-15 15.46 NaN -32.577410 NaN 0.490794 NaN
1986-03-15 12.61 NaN -18.434670 NaN 0.400317 NaN
1986-04-15 12.84 NaN 1.823949 NaN 0.407619 NaN
1986-05-15 15.38 NaN 19.781931 NaN 0.488254 NaN
1986-06-15 13.43 NaN -12.678804 NaN 0.426349 NaN
1986-07-15 11.59 NaN -13.700670 NaN 0.367937 NaN
1986-08-15 15.10 NaN 30.284728 NaN 0.479365 NaN
1986-09-15 14.87 NaN -1.523179 NaN 0.472063 NaN
1986-10-15 14.90 NaN 0.201748 NaN 0.473016 NaN
1986-11-15 15.22 NaN 2.147651 NaN 0.483175 NaN
1986-12-15 16.11 NaN 5.847569 NaN 0.511429 NaN
1987-01-15 18.65 NaN 15.766605 NaN 0.592063 NaN
1987-02-15 17.75 NaN -4.825737 NaN 0.563492 NaN
1987-03-15 18.30 NaN 3.098592 NaN 0.580952 NaN
1987-04-15 18.68 NaN 2.076503 NaN 0.593016 NaN
1987-05-15 19.44 18.58 4.068522 NaN 0.617143 0.589841
1987-06-15 20.07 18.86 3.240741 1.506997 0.637143 0.598730
1987-07-15 21.34 19.86 6.327853 5.302227 0.677460 0.630476
1987-08-15 20.31 18.98 -4.826617 -4.431017 0.644762 0.602540
1987-09-15 19.53 18.31 -3.840473 -3.530032 0.620000 0.581270
1987-10-15 19.86 18.76 1.689708 2.457673 0.630476 0.595556
1987-11-15 18.85 17.78 -5.085599 -5.223881 0.598413 0.564444
1987-12-15 17.28 17.05 -8.328912 -4.105737 0.548571 0.541270
1988-01-15 17.13 16.75 -0.868056 -1.759531 0.543810 0.531746
1988-02-15 16.80 15.73 -1.926445 -6.089552 0.533333 0.499365
1988-03-15 16.20 14.73 -3.571429 -6.357279 0.514286 0.467619
1988-04-15 17.86 16.60 10.246914 12.695180 0.566984 0.526984
1988-05-15 17.42 16.31 -2.463606 -1.746988 0.553016 0.517778
1988-06-15 16.53 15.54 -5.109070 -4.721030 0.524762 0.493333
... ... ... ... ... ... ...
2016-08-15 44.72 45.84 0.156775 1.979978 1.419683 1.455238
2016-09-15 45.18 46.57 1.028623 1.592496 1.434286 1.478413
2016-10-15 49.78 49.52 10.181496 6.334550 1.580317 1.572063
2016-11-15 45.66 44.73 -8.276416 -9.672859 1.449524 1.420000
2016-12-15 51.97 53.31 13.819536 19.181757 1.649841 1.692381
2017-01-15 52.50 54.58 1.019819 2.382292 1.666667 1.732698
2017-02-15 53.47 54.87 1.847619 0.531330 1.697460 1.741905
2017-03-15 49.33 51.59 -7.742659 -5.977766 1.566032 1.637778
2017-04-15 51.06 52.31 3.506994 1.395619 1.620952 1.660635
2017-05-15 48.48 50.33 -5.052879 -3.785127 1.539048 1.597778
2017-06-15 45.18 46.37 -6.806931 -7.868071 1.434286 1.472063
2017-07-15 46.63 48.48 3.209385 4.550356 1.480317 1.539048
2017-08-15 48.04 51.70 3.023804 6.641914 1.525079 1.641270
2017-09-15 49.82 56.15 3.705246 8.607350 1.581587 1.782540
2017-10-15 51.58 57.51 3.532718 2.422084 1.637460 1.825714
2017-11-15 56.64 62.71 9.810004 9.041906 1.798095 1.990794
2017-12-15 57.88 64.37 2.189266 2.647106 1.837460 2.043492
2018-01-15 63.70 69.08 10.055287 7.317073 2.022222 2.193016
2018-02-15 62.23 65.32 -2.307692 -5.442965 1.975556 2.073651
2018-03-15 62.73 66.02 0.803471 1.071647 1.991429 2.095873
2018-04-15 66.25 72.11 5.611350 9.224477 2.103175 2.289206
2018-05-15 69.98 76.98 5.630189 6.753571 2.221587 2.443810
2018-06-15 67.87 74.41 -3.015147 -3.338529 2.154603 2.362222
2018-07-15 70.98 74.25 4.582290 -0.215025 2.253333 2.357143
2018-08-15 68.06 72.53 -4.113835 -2.316498 2.160635 2.302540
2018-09-15 70.23 78.89 3.188363 8.768785 2.229524 2.504444
2018-10-15 70.75 81.03 0.740424 2.712638 2.246032 2.572381
2018-11-15 56.96 64.75 -19.491166 -20.091324 1.808254 2.055556
2018-12-15 49.52 57.36 -13.061798 -11.413127 1.572063 1.820952
2019-01-15 51.38 59.41 3.756058 3.573919 1.631111 1.886032

397 rows × 6 columns

Visualizing all time series data together

In [57]:
trace0 = go.Scatter(
    x = df_crudeOil.index,
    y = df_crudeOil['Cushing, OK WTI Spot Price FOB (Dollars per Gallon)'],
    name = 'Cushing, OK WTI-Crude Oil',
    line = dict(
        color = ('rgb(166,206,227)'),
        width = 2)
)
trace1 = go.Scatter(
    x = df_crudeOil.index,
    y = df_crudeOil['Europe Brent Spot Price FOB (Dollars per Gallon)'],
    name = 'Europe Brent-Crude Oil',
    line = dict(
        color = ('rgb(31,120,180)'),
        width = 2,)
)
trace2 = go.Scatter(
    x = df_cGasoline.index,
    y = df_cGasoline['New York Harbor Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)'],
    name = 'New York Harbor-Conventional Gasoline',
    line = dict(
        color = ('rgb(178,223,138)'),
        width = 2)
)
trace3 = go.Scatter(
    x = df_cGasoline.index,
    y = df_cGasoline['U.S. Gulf Coast Conventional Gasoline Regular Spot Price FOB (Dollars per Gallon)'],
    name = 'U.S. Gulf Coast-Conventional Gasoline',
    line = dict(
        color = ('rgb(51,160,44)'),
        width = 2,)
)
trace4 = go.Scatter(
    x = df_rGasoline.index,
    y = df_rGasoline['Los Angeles Reformulated RBOB Regular Gasoline Spot Price (Dollars per Gallon)'],
    name = 'Los Angeles-RBOB Regular Gasoline',
    line = dict(
        color = ('rgb(255,255,153)'),
        width = 2,)
)
trace5 = go.Scatter(
    x = df_heatOil.index,
    y = df_heatOil['New York Harbor No. 2 Heating Oil Spot Price FOB (Dollars per Gallon)'],
    name = 'New York Harbor-Heating Oil',
    line = dict(
        color = ('rgb(177,89,40)'),
        width = 2,)
)
trace6 = go.Scatter(
    x = df_diesel.index,
    y = df_diesel['New York Harbor Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)'],
    name = 'New York Harbor-Diesel',
    line = dict(
        color = ('rgb(253,191,111)'),
        width = 2,)
)
trace7 = go.Scatter(
    x = df_diesel.index,
    y = df_diesel['U.S. Gulf Coast Ultra-Low Sulfur No 2 Diesel Spot Price (Dollars per Gallon)'],
    name = 'U.S. Gulf Coast-Diesel',
    line = dict(
        color = ('rgb(255,127,0)'),
        width = 2,)
)
trace8 = go.Scatter(
    x = df_diesel.index,
    y = df_diesel['Los Angeles, CA Ultra-Low Sulfur CARB Diesel Spot Price (Dollars per Gallon)'],
    name = 'Los Angeles, CA-Diesel',
    line = dict(
        color = ('rgb(227,26,28)'),
        width = 2,)
)
trace9 = go.Scatter(
    x = df_jet.index,
    y = df_jet['U.S. Gulf Coast Kerosene-Type Jet Fuel Spot Price FOB (Dollars per Gallon)'],
    name = 'U.S. Gulf Coast-Jet Fuel',
    line = dict(
        color = ('rgb(251,154,153)'),
        width = 2,)
)
trace10 = go.Scatter(
    x = df_prop.index,
    y = df_prop['Mont Belvieu, TX Propane Spot Price FOB (Dollars per Gallon)'],
    name = 'Mont Belvieu, TX-Propane',
    line = dict(
        color = ('rgb(106,61,154)'),
        width = 2,)
)



data = [trace0, trace1,trace2,trace3,trace4,trace5,trace6,trace7,trace8,trace9,trace10]

layout = dict(title = 'Analysis of All Pretroleum fuels',
              xaxis = dict(title = 'Year'),
              yaxis = dict(title = 'Dollars per Gallon'),
              )

fig = dict(data=data, layout=layout)

offline.iplot(fig, filename='styled-line')

Creating heatmap to see the price distribution of products across the years

In [58]:
combined_data = pd.concat([df_crudeOil.iloc[:,4:], df_cGasoline.iloc[:,:2],df_rGasoline.iloc[:,:1],df_heatOil
.iloc[:,:1],df_diesel.iloc[:,:3],df_jet.iloc[:,:1],df_prop.iloc[:,:1]], axis=1, sort=False)
cmap = sns.color_palette("Blues")
sns.heatmap(combined_data,cmap=cmap)
Out[58]:
<matplotlib.axes._subplots.AxesSubplot at 0x228fde920b8>

Plotting the coorelation plot to see relation between all the variables

In [59]:
combined_data.columns=[['Crushing Crude','Eurupe Crude','NY C Gas','Gulf Coast C Gas','LA R Gas','NY Heat','NY Diesel','Gulf Coast Diesel','LA Diesel','Gulf Coast Kerosene','TX Prop']]
g = sns.pairplot(combined_data)
In [60]:
#Montly Change HeatMap
combined_data_monthly_change = pd.concat([df_crudeOil.iloc[:,2:4], df_cGasoline.iloc[:,2:],df_rGasoline.iloc[:,1:],df_heatOil
.iloc[:,1:],df_diesel.iloc[:,3:],df_jet.iloc[:,1:],df_prop.iloc[:,1:]], axis=1, sort=False)
cmap = sns.color_palette("Blues")
sns.heatmap(combined_data_monthly_change,cmap=cmap)
Out[60]:
<matplotlib.axes._subplots.AxesSubplot at 0x228832ab5c0>

Identification the correlation between Conventional Gasoline prices between New York Harbor and U.S. Gulf Coast

In [61]:
g = sns.pairplot(df_cGasoline.iloc[:,:2])
In [62]:
np.corrcoef(df_cGasoline.iloc[:,0],df_cGasoline.iloc[:,1])
Out[62]:
array([[1.        , 0.99802942],
       [0.99802942, 1.        ]])

We can see strong positive coorelation between both the cities' prices for conventional Gasoline

Finding out a timeframe where prices of all Products were lower than prior data points

In [63]:
def checkIfLowerThanPrior(prior,current):
    to_check=current-prior
    if ((to_check <0).sum() == to_check.size).astype(np.int)==1:
        return True
    else:
        return False
In [64]:
#Checking for monthly data
timeframe=[]
for row in range(0,len(combined_data)):
    if np.isnan(np.array(combined_data.iloc[row-1,:])).any() or np.isnan(np.array(combined_data.iloc[row,:])).any():
        continue
    else:
        if checkIfLowerThanPrior(np.array(combined_data.iloc[row-1,:]),np.array(combined_data.iloc[row,:])):
            timeframe.append(combined_data.index[row])
#Plot to represent the findings
print("Plot to represent the times when all Products prices were lower than their prior data")
X = pd.Series(timeframe).apply(lambda x: str(x).split('-')[1])
Y = pd.Series(timeframe).apply(lambda x: str(x).split('-')[0])
Z = np.ones(len(X))
plot_data = pd.DataFrame({'Month': X, 'Year': Y, 'True/False': Z})
data_pivoted = plot_data.pivot("Month", "Year", "True/False")
ax = sns.heatmap(data_pivoted)

plt.show()
Plot to represent the times when all Products prices were lower than their prior data
Using data from consecutive months, we can see that in the year 2008 and 2014, during the October, Novemeber and December; we see constant downfall in the prices from the previous months

Prediction the crude oil price for next 6 months

Predicting prices for Cushing, OK WTI Spot Price FOB in Barrels

In [65]:
#Check for stationarity of the time series
from statsmodels.tsa.stattools import adfuller
X = df_crudeOil['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)']
result = adfuller(X)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
ADF Statistic: -1.743708
p-value: 0.408745

As p-value > 0.05, we can say that the series is non stationary Lets take log of the Series and check if that is make it stationary

In [66]:
X = np.log(X)
result = adfuller(X)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
ADF Statistic: -1.964412
p-value: 0.302392

Again, our hull hypotheisis will be rejected in this case. The series stays to be non-stationary.

Let's check the Trend, seasonality and residual of the time series

As the series is non-stationary, we can apply S-ARIMA model for forecasting the prices of the of Crude Oil at Cushing

In [67]:
#Divide information into training and testing data
y = df_crudeOil[:'2010'].resample('MS').mean()
df_train=y[:'2010']
df_test=y['2010':]
#Decompose to see the trend and seasonality
decomposition = sm.tsa.seasonal_decompose(df_train['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)'], model='multiplicative')
fig = decomposition.plot()
plt.show()

Trend and observed graph show similar trend, it will be a multiplicative model.

Here, we try to get the best possible values p,q, r for our seasonal ARIMA model (that is which has the least AIC value)

In [68]:
p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]
min_val=9999999
comment=""
for param in pdq:
    for param_seasonal in seasonal_pdq:
        mod = sm.tsa.statespace.SARIMAX(df_train[['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)']],
                                        order=param,
                                        seasonal_order=param_seasonal,
                                        enforce_stationarity=False,
                                        enforce_invertibility=False)
        results = mod.fit()
        #print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
        if(int(results.aic)<min_val):
            min_val=int(results.aic)
            comment=('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
print(str("Minimum AIC value is obtained when: ")+comment)
            
        
Minimum AIC value is obtained when: ARIMA(1, 1, 1)x(0, 1, 1, 12)12 - AIC:1502.443817496256

Using the min AIC value found combinations of p,q,r for training S-ARIMA model

In [69]:
mod = sm.tsa.statespace.SARIMAX(df_train[['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)']],
                                order=(1, 1, 1),
                                seasonal_order=(0, 1, 1, 12),
                                enforce_stationarity=False,
                                enforce_invertibility=False)
results = mod.fit()
print(results.summary().tables[1])
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.5930      0.063      9.476      0.000       0.470       0.716
ma.L1         -0.1982      0.093     -2.133      0.033      -0.380      -0.016
ma.S.L12      -0.9565      0.059    -16.315      0.000      -1.071      -0.842
sigma2        12.9744      0.778     16.671      0.000      11.449      14.500
==============================================================================
In [70]:
results.plot_diagnostics(figsize=(16, 8))
plt.show()
Diagonostics show that everything seems to be perfect, with a good normal distribution. Now, we predict on the test data to see the results
In [71]:
pred = results.get_prediction(start=pd.to_datetime('2010-01-01'), dynamic=False)
pred_ci = pred.conf_int()
ax = y[['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)']].plot(label='Cushing, OK WTI Spot Price FOB (Dollars per Barrel)')
pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7, figsize=(14, 7))
ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.2)
ax.set_xlabel('Date')
ax.set_ylabel('Dollars per Barrel')
plt.legend()
plt.show()

rms = sqrt(mean_squared_error(df_test[['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)']], pred.predicted_mean))
print("Root mean squered error: "+str(rms))
Root mean squered error: 5.168870748570945

The visualization shows that the predictation was a correctede made. Therefore, we move ahead for training the model now with the complete dataset.

In [72]:
mod = sm.tsa.statespace.SARIMAX(df_crudeOil[['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)']],
                                order=(1, 1, 1),
                                seasonal_order=(0, 1, 1, 12),
                                enforce_stationarity=False,
                                enforce_invertibility=False)
results = mod.fit()
print(results.summary().tables[1])
results.plot_diagnostics(figsize=(16, 8))
plt.show()
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.4611      0.078      5.940      0.000       0.309       0.613
ma.L1         -0.1083      0.102     -1.061      0.288      -0.308       0.092
ma.S.L12      -1.0000    166.972     -0.006      0.995    -328.259     326.259
sigma2        16.1854   2702.814      0.006      0.995   -5281.232    5313.603
==============================================================================

Again, everything seems good.

In [73]:
#Predict for next 6 months
pred = results.forecast(6)

pred=pd.DataFrame(pred)
date_list=['2019-02-15','2019-03-15','2019-04-15','2019-05-15','2019-06-15','2019-07-15']
pred.index=pd.to_datetime(date_list)

trace0 = go.Scatter(
    x = df_crudeOil.index,
    y = df_crudeOil['Cushing, OK WTI Spot Price FOB (Dollars per Barrel)'],
    name = 'Cushing, OK WTI-Crude Oil',
    line = dict(
        color = ('rgb(166,206,227)'),
        width = 2)
)
trace1 = go.Scatter(
    x = pred.index,
    y = pred[0],
    name = 'Forecasted',
    line = dict(
        color = ('blue'),
        width = 2)
)
data = [trace0,trace1]

layout = dict(title = 'Crude Oil - Cushing, OK WTI Spot Price FOB Forecast for next 6 months',
              xaxis = dict(title = 'Year'),
              yaxis = dict(title = 'Dollars per Barrel'),
              )

fig = dict(data=data, layout=layout)
offline.iplot(fig, filename='styled-line')

The forcasts seems to be aligned with it previous value

Predicting Price for Crude at Europe Bent in Barrels

In [74]:
#X=pd.DataFrame(df_crudeOil['Europe Brent Spot Price FOB (Dollars per Barrel)'])
X=[x for x in df_crudeOil['Europe Brent Spot Price FOB (Dollars per Barrel)'] if np.isnan(x)!=True]
result = adfuller(X)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
ADF Statistic: -2.025794
p-value: 0.275379

Series is non - stationary, p-value > 0.05

In [75]:
X = np.log(X)
result = adfuller(X)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
ADF Statistic: -1.642520
p-value: 0.460944

Log values of time series, also says that it is non-stationary

Predict the prices using seasonal ARIMA

Lets start with seeing the decomposition of data

In [76]:
y = df_crudeOil[:'2010'].resample('MS').mean()
y=y.dropna()
df_train=y[:'2010']
df_test=y['2010':]
df_train
#Decompose to see the trend and seasonality
decomposition = sm.tsa.seasonal_decompose(df_train['Europe Brent Spot Price FOB (Dollars per Barrel)'], model='multiplicative')
fig = decomposition.plot()
plt.show()
In [77]:
p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]
min_val=9999999
comment=""
for param in pdq:
    for param_seasonal in seasonal_pdq:
        mod = sm.tsa.statespace.SARIMAX(df_train[['Europe Brent Spot Price FOB (Dollars per Barrel)']],
                                        order=param,
                                        seasonal_order=param_seasonal,
                                        enforce_stationarity=False,
                                        enforce_invertibility=False)
        results = mod.fit()
        #print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
        if(int(results.aic)<min_val):
            min_val=int(results.aic)
            comment=('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
print(str("Minimum AIC value is obtained when: ")+comment)
            
        
Minimum AIC value is obtained when: ARIMA(1, 1, 1)x(0, 1, 1, 12)12 - AIC:1447.3098593150075

Similar to what we did before, we take the found combination and feed it to the SARIMA Model and its diagnostics

In [78]:
mod = sm.tsa.statespace.SARIMAX(df_train[['Europe Brent Spot Price FOB (Dollars per Barrel)']],
                                order=(1, 1, 1),
                                seasonal_order=(0, 1, 1, 12),
                                enforce_stationarity=False,
                                enforce_invertibility=False)
results = mod.fit()
print(results.summary().tables[1])
results.plot_diagnostics(figsize=(16, 8))
plt.show()
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.5162      0.081      6.379      0.000       0.358       0.675
ma.L1         -0.1372      0.113     -1.210      0.226      -0.359       0.085
ma.S.L12      -1.0376      0.087    -11.900      0.000      -1.209      -0.867
sigma2        13.8474      1.661      8.339      0.000      10.593      17.102
==============================================================================

Everything seems good, so we move ahead with the visualizing the predictions

In [79]:
pred = results.get_prediction(start=pd.to_datetime('2010-01-01'), dynamic=False)
pred_ci = pred.conf_int()
ax = y[['Europe Brent Spot Price FOB (Dollars per Barrel)']].plot(label='Europe Brent Spot Price FOB (Dollars per Barrel)')
pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7, figsize=(14, 7))
ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.2)
ax.set_xlabel('Date')
ax.set_ylabel('Price in Barrels')
plt.legend()
plt.show()

rms = sqrt(mean_squared_error(df_test[['Europe Brent Spot Price FOB (Dollars per Barrel)']], pred.predicted_mean))
print("Root mean squered error: "+str(rms))
Root mean squered error: 4.681480507769938

The model could correctly get the trend and predict the results. Now, we will train the model with complete data.

In [80]:
mod = sm.tsa.statespace.SARIMAX(df_crudeOil[['Europe Brent Spot Price FOB (Dollars per Barrel)']],
                                order=(1, 1, 1),
                                seasonal_order=(0, 1, 1, 12),
                                enforce_stationarity=False,
                                enforce_invertibility=False)
results = mod.fit()
print(results.summary().tables[1])
results.plot_diagnostics(figsize=(16, 8))
plt.show()
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.3229      0.083      3.900      0.000       0.161       0.485
ma.L1          0.0303      0.102      0.296      0.767      -0.170       0.231
ma.S.L12      -1.0000    246.049     -0.004      0.997    -483.247     481.247
sigma2        17.7862   4376.580      0.004      0.997   -8560.153    8595.726
==============================================================================

All seems good, we therefore, visualize the forecasted data

In [81]:
#Predict for next 6 months
pred = results.forecast(6)

pred=pd.DataFrame(pred)
date_list=['2019-02-15','2019-03-15','2019-04-15','2019-05-15','2019-06-15','2019-07-15']
pred.index=pd.to_datetime(date_list)

trace0 = go.Scatter(
    x = df_crudeOil.index,
    y = df_crudeOil['Europe Brent Spot Price FOB (Dollars per Barrel)'],
    name = 'Europe Brent - Crude Oil',
    line = dict(
        color = ('rgb(166,206,227)'),
        width = 2)
)
trace1 = go.Scatter(
    x = pred.index,
    y = pred[0],
    name = 'Forecasted',
    line = dict(
        color = ('blue'),
        width = 2)
)
data = [trace0,trace1]

layout = dict(title = 'Crude Oil - Europe Brent Spot Price FOB Forecast for next 6 months',
              xaxis = dict(title = 'Year'),
              yaxis = dict(title = 'Dollars per Barrel'),
              )

fig = dict(data=data, layout=layout)
offline.iplot(fig, filename='styled-line')